1 Figure 1

official_birth_records = read_feather(path = str_c(IO$out_Rdata,"official_birth_records.feather"))

1.1 Births data

official_birth_records = official_birth_records %>% mutate(births_thousands = births/1000)

official_births_summary = official_birth_records %>% 
  group_by(country_area, country_area_col) %>% 
  summarize(min_births = min(births_thousands),max_births = max(births_thousands),mean_births = mean(births_thousands),
            max_date = max(date))
ref_date = as.Date("2020-01-01")

g_births = ggplot(official_birth_records, aes(x = date, y = births_thousands, col = country_area_col))
g_births = g_births +
  #### reference for variations
  geom_segment(data = official_births_summary, aes(x = ref_date, xend = ref_date, y = mean_births, yend = mean_births-5*mean_births/100),
               size = 1.5, alpha = 0.7)+
  # geom_text(data = official_births_summary, aes(x = ref_date, y = min_births-5*mean_births/100), label = "5% change from the mean",
  #           hjust = 0, vjust = 0, nudge_x = 100)+
  #### uncorrected births
  geom_line(aes(y = births_original_numbers/1000), col = "gray80")+
  #### corrected births
  geom_line()+   #geom_point(size = 0.5)+
  #### COUNTRIES
  geom_text(data = official_births_summary, aes(x = ref_date, y = max_births *1.05, label = country_area, fontface = 2), vjust = 0, hjust = 1)+
  #### settings
  scale_color_identity()+
  scale_x_date(date_minor_breaks = "1 year", date_labels = "%Y")+
  scale_y_continuous(expand = expansion(mult = c(0,0.2)))+ # sec.axis = sec_axis(~ (. - max(.))/max(.)*100, breaks = seq(-30,30,by = 10), name = "% change from the max") 
  ylab("Births (monthly in thousands)")+ xlab("Date")+
  facet_grid(country_area ~., scale = "free")+
  theme(strip.text.y = element_blank())

g_births

1.2 App data

### change
users = read_feather(path = paste0(IO$output_clue, "users.feather"))

n_users_per_country = users %>% group_by(country_area) %>% summarise(n_users = n()) %>% 
  mutate(country_area_col = dict$country_area$country_area_col[match(country_area, dict$country_area$country_area)],
         country_area = factor(country_area, levels = dict$country_area$country_area))


g_n_users = ggplot(n_users_per_country, aes(x = country_area, y = n_users/1000, fill = country_area_col))+
  coord_flip()+
  geom_bar(stat = "identity")+
  scale_fill_identity()+
  xlab("")+ylab("K# of app users")+
  theme(strip.text = element_blank(),
        axis.text.y = element_blank())+
  facet_grid(country_area ~ ., scale = "free")
g_n_users

1.3 Overall relative sex

pp = read_feather(path = "../Data/outputs/Rdata/aggregated_sex_counts_clue_July2017-June2019_incl.feather")

pp_sum = pp %>%
  filter(BC == "all", age_cat == "all") %>% 
  mutate(sex_type_str = 
           case_when(
             sex_type == "all_sex" ~ "Total Sex", 
             sex_type == "prot_sex" ~ "Protected Sex",
             sex_type == "unprot_sex" ~ "Unprotected Sex")) %>% 
  group_by(date, sex_type, sex_type_str) %>% 
  summarize(n_users = sum(n_users),
            n_any  = sum(n_any),
            n_log = sum(n),
            .groups = "drop") %>% 
  group_by(sex_type) %>% 
  mutate(relative_frequency = n_log / n_any,
         relative_frequency = relative_frequency/mean(relative_frequency)) %>% 
  arrange(sex_type , date) %>% ungroup() %>%  
  mutate(
    sex_type_str = sex_type_str %>% 
      factor(., levels = c("Total Sex","Protected Sex","Unprotected Sex"))
    )

g_sex_freq = 
  ggplot(pp_sum, aes(x = date, y = relative_frequency, col = sex_type_str))+
  geom_line()+
  ylab("Relative sex frequency (daily)")+xlab("Date")+
  #expand_limits(y = 0)+  
  scale_color_manual(name = "",values = c("gray40","steelblue2","yellow3"))+
  facet_grid(sex_type_str ~ .)+
  guides(col = FALSE) +
  theme(legend.position = "top",
        legend.direction = "horizontal",
        legend.title = element_blank(),
        legend.background = element_rect(fill = "white", color = "transparent", size = 0.5)) 
g_sex_freq

1.4 Clue screen

# import png
clue_screen = image_read("../Figures Tables Media/Media/Clue_app_screens.png")

clue_screen_ratio = image_info(clue_screen)$height / image_info(clue_screen)$width 
padding = 20
g_clue_screen = ggplot()+ coord_fixed(ratio = clue_screen_ratio)+
  background_image(clue_screen)+theme(plot.margin = margin(t=padding, l=padding, r=padding, b=padding, unit = "pt"))

1.5 Assembled Figure 1

fig_1 = arrangeGrob(
  g_births,     
  g_clue_screen,   
  g_sex_freq,
  ncol = 2, nrow = 2, 
  widths = c(1.5, 1),
  heights = c(0.55,0.45),
  layout_matrix = cbind(c(1,1), c(3,4)))

fig_1 = as_ggplot(fig_1) + 
  draw_plot_label(label = letters[1:3], size = 15,
                  x = c(0, 0.6, 0.6), y = c(1, 1, 0.45))

fig_1

2 Figure 2

# load Figure 2 data

load(file = str_c(IO$p_outputs,"Rdata/sex_models_df.Rdata"), verbose = TRUE)
## Loading objects:
##   sex_models_df
plotlist = list()
n_items = c()
cas = unique(sex_models_df$country_area)
ok = foreach(ca = cas) %do% {
  cat(ca, "\n")
  
  # retrieving the model for this country and only plotting for BC all and sex_type all
  j = which(
    (sex_models_df$country_area == ca) & 
      (sex_models_df$BC == "all") & 
      (sex_models_df$age_cat == "all") & 
      (sex_models_df$sex_type == "unprot_sex"))
  
  
  load(file = str_c(IO$p_outputs,"Rdata/sex_models_",ca,".Rdata"), verbose = TRUE)
  j = which((sapply(sex_models_this_location, "[[","BC") == "all") &
            (sapply(sex_models_this_location, "[[","age_cat") == "all") &
             (sapply(sex_models_this_location, "[[","sex_type") == "unprot_sex"))
  glm_sex_behavior = sex_models_this_location[[j]]$model
    
  # glm_sex_behavior = sex_models[[j]]$model
  
  # Visualisation of the coefficient
  g_coef = ggplot_sex_activity_glm_coefficient(
    model = glm_sex_behavior, 
    show_intercept = FALSE, 
    show_weekly_patterns = FALSE, 
    ylim = c(-0.4, 1))
  g_coef = g_coef + 
    ggtitle(ca)+
    theme(axis.text.x = element_text(size = 7))
  print(g_coef)
  
  plotlist[[ca]] = g_coef
  n_items = c(n_items, nrow(g_coef$data))
}
## Brazil - Central-West 
## Loading objects:
##   sex_models_this_location

## Brazil - Northeast 
## Loading objects:
##   sex_models_this_location

## France 
## Loading objects:
##   sex_models_this_location

## United Kingdom 
## Loading objects:
##   sex_models_this_location

## United States - California 
## Loading objects:
##   sex_models_this_location

## United States - Northeast 
## Loading objects:
##   sex_models_this_location

fig_2 = ggarrange(
  ggarrange(
    plotlist[[which(cas == "Brazil - Central-West")]], 
    plotlist[[which(cas == "United States - Northeast")]], 
    labels = c("a","b"),
    ncol = 2, nrow = 1,
    widths = n_items[which(cas %in% c("Brazil - Central-West","United States - Northeast"))] / 
      sum(n_items[which(cas %in% c("Brazil - Central-West","United States - Northeast"))])
  ),
  ggarrange(
    plotlist[[which(cas == "France")]], 
    plotlist[[which(cas == "United Kingdom")]], 
    labels = c("c","d"),
    ncol = 2, nrow = 1,
    widths = n_items[which(cas %in% c("France","United Kingdom"))] / 
      sum(n_items[which(cas %in% c("France","United Kingdom"))])
  ),
  nrow = 2, ncol = 1
)

fig_2

3 Figure 3

G_table = read_feather(path = str_c(IO$out_Rdata,"Gestation_par_table.feather"))
# t = seq(0,4*pi, by = pi/20)
# f = sin(t)
# plot(t, f, type = "l")

t = seq(30*7, 45*7)

d = foreach(ca = G_table$country_area, .combine = bind_rows) %do% {
  this_ca = G_table %>% filter(country_area == ca)
  res = this_ca[rep(1,length(t)),]
  res = res %>% mutate(t = t, d = dnorm(t, mean = this_ca$G*7, sd = Gsd0), G = this_ca$G*7)
  res
}

br = seq(min(t), max(t), by = 14)

d = d %>% 
  mutate(country_area_wrapped = str_replace(country_area, " - ","\n"),
         country_area_col = dict$country_area$country_area_col[match(country_area, dict$country_area$country_area)])

g_d = ggplot(d, aes(x = t, y = d, fill = country_area_col))+
  geom_area(alpha = 0.6)+
  geom_vline(aes(xintercept = G, col = country_area_col))+
  scale_color_identity()+
  scale_fill_identity()+
  scale_x_continuous(breaks = br , labels = br/7)+
  scale_y_continuous(breaks = NULL)+
  xlab("G, gestation duration\n(weeks)")+
  ylab(expression(paste("Density, d",tau)))+  #expression(paste("Value is ", sigma,",", R^{2},'=0.6'))
  facet_grid(country_area_wrapped ~ ., scale = "free")+
  theme(strip.text.y = element_text(angle = 0, hjust = 0))
g_d

ggplot(d, aes(x = t, y = country_area, alpha = d))+
  geom_tile()+
  scale_y_discrete(limits = rev(levels(G_table$country_area)))+
  xlab("G, gestation duration\n(weeks)")+
  ylab("Density")+
  guides(alpha = FALSE)+
  scale_alpha_continuous(range = c(0,1))+
  scale_x_continuous(breaks = br , labels = br/7)

model = image_read("../Figures Tables Media/Media/Figure 3-01.png")
model_ratio =  image_info(model)$height / image_info(model)$width 
g_model = ggplot()+ coord_fixed(ratio = model_ratio)+
  background_image(model)



models = image_read("../Figures Tables Media/Media/Figure 3-02.png")
models_ratio = image_info(models)$height / image_info(models)$width
g_models = ggplot()+ coord_fixed(ratio = models_ratio)+
  background_image(models)
fig_3 = ggarrange(
  ggarrange(
    g_model, 
    g_d,
    labels = c("a","b"),
    ncol = 2, nrow = 1,
    widths = c(1,1)
  ),
  ggarrange(
    g_models,
    labels = c("c"),
    ncol = 1, nrow = 1,
    widths = 1
  ),
  nrow = 2, ncol = 1,
  heights = c(1,1)
)

fig_3

4 Figure 4

births = read_feather(path = str_c(IO$out_Rdata, "simulated_births.feather"))

births_STL = read_feather(path = str_c(IO$out_Rdata, "simulated_births_seasonal_trends.feather"))

opt_par_df = read_feather(path = str_c(IO$out_Rdata, "optimal_parameters_and_AIC.feather"))


ca_levels = dict$country_area$country_area
births = births %>% mutate(country_area = factor(country_area, levels = ca_levels))
births_STL = births_STL %>% mutate(country_area = factor(country_area, levels = ca_levels))
opt_par_df = opt_par_df %>% mutate(country_area = factor(country_area, levels = ca_levels))


ca_wrapped_levels = dict$country_area$country_area %>% str_replace(.," - ","\n")

births = births %>% 
  mutate(country_area_wrapped = str_replace(country_area," - ","\n") %>%  
           factor(., levels = ca_wrapped_levels))

opt_par_df = opt_par_df %>% group_by(country_area, BC, sex_type) %>% 
  mutate(AIC_best_model = min(AIC),
         dAIC = AIC - AIC_best_model,
         country_area_wrapped = str_replace(country_area," - ","\n") %>%  
           factor(., levels = ca_wrapped_levels))

4.1 time-series

g_ts = ggplot(births %>%  filter(sex_type == "unprot_sex", BC == "all"), aes(x = year_month, y = sim_births, col = model))
g_ts = g_ts +
  geom_line(aes(y = births), col = "black", size = 0.7)+
  geom_line(size = 0.5)+
  geom_text(data = opt_par_df %>%  filter(sex_type == "unprot_sex", BC == "all"),
            aes(x = Inf, y = Inf, col = model, 
                label = str_c(AIC %>% round(),ifelse(dAIC > 0, str_c("\n+",dAIC %>% round()),""))), 
            hjust = 1, vjust = 1, size = 3, lineheight = 0.75)+
  guides(col = FALSE)+
  xlab("Date")+ylab("")+
  facet_grid(country_area_wrapped ~ model, scale = "free" ,switch = "y")+
  theme(strip.placement = "outside",
        strip.text.y.left = element_text(angle = 0, hjust = 0.5),
        strip.background.y = element_rect(fill = "gray90", color = NA))+
  ggtitle("Measured vs. simulated births")

g_ts

4.3 AIC

g_aic = ggplot(opt_par_df %>%  filter(BC == "all", sex_type == "unprot_sex"),
               aes(x = model, y = country_area, fill = dAIC))
g_aic = g_aic +
  geom_tile()+
  scale_fill_gradient(name = "", low = hsv(0.58,0.4,1), high = hsv(0.02,0.9,0.9))+
  xlab("")+ylab("")+
  #guides(fill = FALSE)+
  facet_grid(country_area ~ model, scale = "free")+
  ggtitle("Diff. with best AIC")+
  theme(axis.text = element_blank(),
        strip.text.y = element_blank())

g_aic

g_best_aic = ggplot(opt_par_df %>%  filter(BC == "all", sex_type == "unprot_sex", model == "A"),
                    aes(x = country_area, y = AIC_best_model))
g_best_aic = g_best_aic +
  geom_bar(stat = "identity")+ coord_flip()+
  xlab("")+
  facet_grid(country_area ~ model, scale = "free")+
  theme(axis.text = element_blank(), strip.text.x = element_blank())
g_best_aic

4.4 Phase and amplitude

4.4.1 circular plots

opt_par_df = opt_par_df %>%  
  mutate(country_area_col = country_area_col %>%  factor(., levels = dict$country_area$country_area_col),
         Amplitude = 100*alpha,
         age_cat_this_ca = get_age_cat(country_area = country_area))

DF = opt_par_df %>% 
  filter(model != "A", 
         sex_type == "unprot_sex", 
         BC == "all",
         age_cat == age_cat_this_ca)

g_F = ggplot(DF, aes(x = Tp, y = Amplitude, col = country_area_col))
g_F = g_F +
  geom_segment(aes(xend = Tp, yend = 0, linetype = model))+
  geom_point()+
  scale_color_identity(guide = "legend",
                       labels = ca_wrapped_levels, 
                       name = "location" )+
  scale_x_continuous(limits = c(0,1),breaks = seq(0,3/4,by = 1/4), labels = c("Jan","Apr","Jul","Oct"))+
  ylab("Amplitude (% from mean)")+ xlab("Fertility peak time")+
  coord_polar(theta = "x", start = 0)+
  ggtitle("Calendar year")
g_F

DF = DF %>%
  mutate(
    country = str_split_fixed(country_area, " - ",2)[,1],
    Tp_seas = (Tp-10/365 - 0.5*(country == "Brazil"))%%1)





g_F_seas = ggplot(DF, aes(x = Tp_seas, y = Amplitude, col = country_area_col))
g_F_seas = g_F_seas +
  geom_segment(aes(xend = Tp_seas, yend = 0, linetype = model))+
  geom_point()+
  scale_color_identity(guide = "legend",
                       labels = ca_wrapped_levels, 
                       name = "location" )+
  scale_y_continuous(position = "right")+
  scale_x_continuous(limits = c(0,1),breaks = seq(0,3/4,by = 1/4), labels = c("Winter\nSolstice","Spring\nEquinox","Summer\nSolstice","Autumn\nEquinox"))+
  ylab("Amplitude (% from mean)")+ xlab("Fertility peak time")+
  coord_polar(theta = "x", start = 0, clip = "off")+
  ggtitle("Solar year")
g_F_seas

4.4.2 sine curves

df_sine_curves = expand.grid(t = seq(0,12,by = 0.1), country_area = DF$country_area)
df_sine_curves = left_join(df_sine_curves, 
                           DF %>% 
                             filter(sex_type == "unprot_sex", BC == "all", model == "C") %>% 
                             select(country_area, country_area_col, country_area_wrapped,  alpha, Amplitude, Tp, Tp_seas) %>% 
                             ungroup(),
                           by = "country_area")
## Adding missing grouping variables: `BC`, `sex_type`
df_sine_curves = df_sine_curves %>% 
  mutate(
    Fertility_calendar = 100 + Amplitude * cos(t/12*2*pi - Tp*2*pi),
    Fertility_solar = 100 + Amplitude * cos(t/12*2*pi - Tp_seas*2*pi)
  )


g_fert = ggplot(df_sine_curves, aes(x = t, y = Fertility_calendar, col = country_area_col))+
  geom_hline(yintercept = 100, col = "gray75")+
  geom_line(size = 0.8)+
  scale_color_identity()+
  scale_x_continuous(breaks = seq(0,12,by = 3), minor_breaks = 0:12, labels = c("Jan","Apr","Jul","Oct","Jan"))+
  xlab("Calendar months")+
  ylab("Relative changes in fertility (%)")
g_fert

g_fert_solar = ggplot(df_sine_curves, aes(x = t, y = Fertility_solar, col = country_area_col))+
  geom_hline(yintercept = 100, col = "gray75")+
  geom_line(size = 0.8)+
  scale_color_identity()+
  scale_x_continuous(breaks = seq(0,12,by = 3), minor_breaks = 0:12, 
                     labels = c("Winter\nSolstice","Spring\nEquinox","Summer\nSolstice","Autumn\nEquinox","Winter\nSolstice"))+
  xlab("Solar time")+
  ylab("Relative changes in fertility (%)")
g_fert_solar

g_fert_curves = cowplot::plot_grid(g_fert, g_fert_solar, nrow = 1, ncol = 2)

4.5 Assembling Figure 4 together

fig_4 = ggarrange(
  ggarrange(
    g_ts,
    g_st,
    labels = c("a","b","c"),
    ncol = 2, nrow = 1,
    widths = c(3,1)
  ),
  ggarrange(
    g_F + theme(plot.title = element_text(hjust = 0.5), legend.position = "none"),
    g_F_seas + theme(plot.title = element_text(hjust = 0.5), axis.title.y = element_blank(), axis.text.y = element_blank()),
    ggplot(),
    labels = c("e","f",""),
    ncol = 3, nrow = 1,
    widths = c(1.2,1.8,2), heights = 1
  ),
  nrow = 2, ncol = 1,
  heights = c(2,1)
)

fig_4

5 Figure 4 v2

fig_4_plotlist = list()

ordered_ca = c("Brazil - Central-West","United States - California","Brazil - Northeast","United States - Northeast","France","United Kingdom")
model_colors = c("turquoise3","sienna3","maroon3")

# for each country, we put together the time-series and the seasonal trend
for(ca in ordered_ca){
  cat(ca,"\n")
  
  age_cat_this_ca = get_age_cat(country_area = ca)
  
  # time-series
  this_ca_births = births %>% 
    filter(country_area == ca,
           age_cat == age_cat_this_ca,
           BC == "all",
           sex_type == "unprot_sex")
  
  
  this_ca_opt_par_fd = opt_par_df %>%  
    filter(country_area == ca, age_cat == age_cat_this_ca, sex_type == "unprot_sex", BC == "all") %>% 
    mutate(dAIC2 = AIC - min(AIC))
  
  g_ts_ca = ggplot(this_ca_births, aes(x = year_month))
  g_ts_ca = g_ts_ca +
    geom_line(aes(y = births/1000), size = 0.7)+
    geom_line(aes(y = sim_births/1000, col = model))+
    geom_text(data = this_ca_opt_par_fd, 
              aes(x = -Inf, y = Inf, col = model, 
                  label = str_c(
                    model, " : ", 
                    AIC %>% round()#,
                    #ifelse(dAIC2 > 0, str_c("\n+",dAIC2 %>% round()),"")
                  )
              ), 
              hjust = 0, vjust = 1, size = 4, lineheight = 0.75, fontface = 2)+
    scale_y_continuous(expand = expansion(mult = c(0, 0.1))) +
    scale_x_continuous(breaks = seq(0,2020, by = 5), minor_breaks = 0:2020, expand = expansion(mult = c(0.05, 0))) +
    scale_color_manual(values = model_colors)+
    facet_grid(model ~ ., labeller = label_both)+
    theme(strip.text.y = element_blank())+
    guides(col = FALSE)+
    xlab("Date")+
    ylab("Births [thousands]")+
    ggtitle(ca)
  
  # seasonal trend
  this_cat_births_STL = births_STL  %>% 
    filter(country_area == ca,
           BC == "all",
           sex_type == "unprot_sex")
  
  g_stl_ca = ggplot(this_cat_births_STL, aes(x = month))
  g_stl_ca = g_stl_ca +
    geom_hline(yintercept = 0, col = "gray80")+
    geom_line(aes(y = births_seasonal/1000), col = "black", size = 1)+
    geom_line(aes(y = sim_births_seasonal/1000, col = model))+
    scale_x_continuous(breaks = seq(0,12,by = 3))+
    scale_color_manual(values = model_colors)+
    guides(col = FALSE)+
    xlab("Calendar months")+ylab("Seasonal trends [thousands]")+
    facet_grid(model ~ ., labeller = label_both)+
    theme(strip.text.y = element_blank())
  
  
  g_ca = cowplot::plot_grid(g_ts_ca, g_stl_ca, nrow = 1, rel_widths = c(3,1), align = "h")
  
  
  fig_4_plotlist[[ca]] = g_ca
}
## Brazil - Central-West 
## United States - California 
## Brazil - Northeast 
## United States - Northeast 
## France 
## United Kingdom
fig_4_time_series = cowplot::plot_grid(plotlist = fig_4_plotlist, nrow = 3, ncol = 2, labels = letters[1:6])
fig_4_time_series

fig_4_v2 = ggarrange(
  fig_4_time_series,
  ggarrange(
    g_F + theme(plot.title = element_text(hjust = 0.5), legend.position = "none"),
    g_F_seas + theme(plot.title = element_text(hjust = 0.5), axis.title.y = element_blank(), axis.text.y = element_blank(), 
                     legend.key.height = unit(22,"pt"), legend.spacing.y = unit(0,"pt")),
    g_fert_curves,
    labels = c("g","h","i"),
    ncol = 3, nrow = 1,
    widths = c(1.2,1.8,3), heights = 1
  ),
  nrow = 2, ncol = 1,
  heights = c(3,1)
)


fig_4_v2

6 Saving to pdf

Figures_path = "../Figures Tables Media/Figures/"
scale = 2

ggsave(plot = fig_1, filename = str_c(Figures_path, "F1.pdf"), 
       width = 17.5, height = 12, units = "cm", scale = scale)

ggsave(plot = fig_2, filename = str_c(Figures_path, "F2.pdf"), 
       width = 17.5, height = 10, units = "cm", scale = scale)

ggsave(plot = fig_3, filename = str_c(Figures_path, "F3.pdf"), 
       width = 8, height = 7, units = "cm", scale = scale)

#ggsave(plot = fig_4, filename = str_c(Figures_path, "F4.pdf"), 
#       width = 17.5, height = 13, units = "cm", scale = scale)


ggsave(plot = fig_4_v2, filename = str_c(Figures_path, "F4.pdf"), 
       width = 17.5, height = 17, units = "cm", scale = scale)